####################################################################################
## xu xian feng
## 2022/09/18
## STAD analysis
####################################################################################

export config_file=~/20220915_gastric_multiple/rna_combine/analysis/config/config.sh
source ${config_file}

####################################################################################
## 公共数据
## 1、已报道的胃癌驱动基因
## 2、CGC已报道的驱动基因
${public_ref_path}

####################################################################################
## 链接样本list
## 非医院来源的正常建库的样本
use_sample=`cat ~/20220915_gastric_multiple/rna_combine/config/use_sample.list | tr '\n' '|' | sed 's/|$//'`
## 存在样本测序质量不合格，先排除下游分析
exclusion_sample=`cat ~/20220915_gastric_multiple/rna_combine/config/exclusion_sample.list | tr '\n' '|' | sed 's/|$//'`

## 只提取IM、IGC、DGC
## 癌前病变必须有IM
## JZGC00988是医院来源，癌旁151N未测
cat ~/20220915_gastric_multiple/config/Tumor_Normal_RNA.tsv | head -1 > ${config_path}/tumor_normal.list
cat ~/20220915_gastric_multiple/config/Tumor_Normal_RNA.tsv | \
grep -v -E -w ${exclusion_sample} | \
grep -E -w ${use_sample} | \
awk -F'\t' '{OFS="\t"}{if($6~"IM")print}' | \
awk -F'\t' '{OFS="\t"}{if($4=="IM" || $4=="IGC" || $4=="DGC" )print}' | \
sed 's/151N/#N\/A/' \
>> ${config_path}/tumor_normal.list

echo "sample" > ${config_path}/patients.csv
cat ${config_path}/tumor_normal.list | sed '1d' | awk -F'\t' '{print $2,$3}' | tr ' ' '\n' | sort -u | grep -v "#N/A" \
>> ${config_path}/patients.csv

####################################################################################
## 目前分析样本的list质控报告
${Rscript} ~/20220915_gastric_multiple/rna_combine/scripts/MatchSample_BamQc.R \
--bam_qc_file ~/20220915_gastric_multiple/rna_combine/Qc/multiqc_report.tsv \
--rin_file ~/20220915_gastric_multiple/config/rna_rin.tsv \
--base_line_file ${config_path}/tumor_normal.list \
--out_path ${config_path}


####################################################################################
## 表达矩阵软链接
raw_rsem_path=~/20220915_gastric_multiple/rna_combine/RSEM

for sample in `cat ${config_path}/patients.csv | sed '1d'`
do
ln -snf ${raw_rsem_path}/${sample}.rsem* ${rsme_path}
done

#########################################
## TPM
## 合并表达矩阵
${Rscript} ${scripts_path}/CollapseGeneTPM.R \
--sample_list_file ${config_path}/patients.csv  \
--rsem_path ${rsme_path} \
--out_file ${rsme_path}/CombineTPM.tsv

## 同一人同一类型的样本合并表达矩阵，去除低表达，注释Hugo_Symbol
## 只保留Normal、IM、IGC、DGC
${Rscript} ${scripts_path}/mergeClassTpm.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--rsem_file ${rsme_path}/CombineTPM.tsv \
--out_file ${rsme_path}/CombineTpm.FilterLowExpression-MergeMutiSample.tsv

#########################################
## counts用于差异表达分析
## 合并矩阵
${Rscript} ${scripts_path}/CollapseGeneCounts.R \
--sample_list_file ${config_path}/patients.csv  \
--rsem_path ${rsme_path} \
--out_file ${rsme_path}/CombineCounts.tsv

## 同一人同一类型的样本合并counts矩阵，注释Hugo_Symbol
## 去除tpm低表达的基因
## 只保留Normal、IM、IGC、DGC
${Rscript} ${scripts_path}/mergeClassCounts.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--rsem_file ${rsme_path}/CombineCounts.tsv \
--filter_tpm_file ${rsme_path}/CombineTpm.FilterLowExpression-MergeMutiSample.tsv \
--out_file ${rsme_path}/CombineCounts.FilterLowExpression-MergeMutiSample.tsv

####################################################################################
## IM和Normal、IGC和Normal、DGC和Normal的差异基因可视化
## 基于counts使用deseq2
${expressionTime_rscript} ${scripts_path}/diffGene_compute.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${Images_path}/DiffGene \
--rsem_file ${rsme_path}/CombineCounts.FilterLowExpression-MergeMutiSample.tsv

## 定义显著差异基因的阈值
export foldchange_t=2
export q_t=0.05
## 差异基因画图,6种情况的画图
${expressionTime_rscript} ${scripts_path}/diffGene_plot_v2.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${Images_path}/DiffGene \
--foldchange_t ${foldchange_t} \
--q_t ${q_t} \
--input_file ${Images_path}/DiffGene/DiffGene.tsv

## IM和Normal、IGC和Normal、DGC和Normal的差异基因所在HallMarks通路
${expressionTime_rscript} ${scripts_path}/diffGene_pathway.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--diff_file ${Images_path}/DiffGene/DiffGene.tsv \
--out_path ${Images_path}/DiffGene \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--foldchange_t ${foldchange_t} \
--q_t ${q_t} \
--pathway_path ~/ref/Pathway

## IM和Normal、IGC和Normal、DGC和Normal的差异基因所在STAD和胃算分泌、BMP相关通路
${expressionTime_rscript} ${scripts_path}/diffGene_pathway.STAD.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--diff_file ${Images_path}/DiffGene/DiffGene.tsv \
--out_path ${Images_path}/DiffGene \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--foldchange_t ${foldchange_t} \
--q_t ${q_t} \
--pathway_path ~/ref/Pathway/stad

####################################################################################
## 表达的动态变化聚类
## 基于差异基因
<<EOF
${expressionTime_rscript} ${scripts_path}/time_Mfuzz.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${Images_path}/mfuzz \
--diff_file ${Images_path}/DiffGene/DiffGene.tsv \
--foldchange_t ${foldchange_t} \
--q_t ${q_t} \
--rsem_file ${rsme_path}/CombineTpm.FilterLowExpression-MergeMutiSample.tsv
EOF
## 使用deseq2依据counts标准化处理的文件
${expressionTime_rscript} ${scripts_path}/time_Mfuzz_v2.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${Images_path}/mfuzz_v2 \
--diff_file ${Images_path}/DiffGene/DiffGene.tsv \
--foldchange_t ${foldchange_t} \
--q_t ${q_t} \
--rsem_file ${Images_path}/DiffGene/CombineCounts.FilterLowExpression-MergeMutiSample.varianceStabilizingTransformation.tsv 

## wgcna,构建module
## 输入的文件为差异基因
${expressionTime_rscript} ${scripts_path}/tpm_wgcna_v2.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--rsem_file ${Images_path}/DiffGene/CombineCounts.FilterLowExpression-MergeMutiSample.varianceStabilizingTransformation.tsv \
--out_path ${Images_path}/wgcnv

## 注释基因的重要性
## 1、已报道的胃癌的驱动基因
## 2、已报道的CGC
## 3、hallmarks定义的肿瘤相关通路
## 4、是否为hub基因
${Rscript} ${scripts_path}/annotation_Mfuzz_v2.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--igc_cluster_file ${Images_path}/mfuzz_v2/mfuzz_plot_IGC.tsv \
--dgc_cluster_file ${Images_path}/mfuzz_v2/mfuzz_plot_DGC.tsv \
--smg_file ${public_ref_path}/SMG_sort.list \
--cgc_file ${public_ref_path}/cancer_gene_census.list \
--hll_pathway_file ${ref_path}/Pathway/h.all.v7.5.1.symbols.txt \
--stad_pathway_file ${ref_path}/Pathway/stad/STADPathway.txt \
--gastric_pathway_file ${ref_path}/Pathway/stad/GastricIMPathway.txt \
--hub_file ${Images_path}/wgcnv/wgcnv_HubModule.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${Images_path}/mfuzz_v2

<<EOF
## 给基因判断证据等级
## level：既为已报道(胃癌的smg或cgc)、又在hallmarks通路、又是hub基因
## leve2：为已报道(胃癌的smg或cgc)、hallmarks通路、hub基因的任二
## leve3：为已报道(胃癌的smg或cgc)、hallmarks通路、hub基因的任一
${Rscript} ${scripts_path}/selectImportantGene.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--igc_cluster_file ${Images_path}/mfuzz/mfuzz_plot_IGC.annotation.tsv \
--dgc_cluster_file ${Images_path}/mfuzz/mfuzz_plot_DGC.annotation.tsv \
--rsem_file ${rsme_path}/CombineTpm.FilterLowExpression-MergeMutiSample.tsv \
--out_path ${Images_path}/importGene_v1
EOF


####################################################################################
## 免疫浸润情况
## cibersort推测细胞组成
${expressionTime_rscript} ${scripts_path}/immune_cibersort.R \
--rsem_file ${rsme_path}/CombineTPM.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${Images_path}/cibersort \
--cibersort_path ${tools_path}/StandTools/Cibersort

## 描述免疫浸润细胞的动态变化
${expressionTime_rscript} ${scripts_path}/immune_cibersort_match.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--cibersort_file ${Images_path}/cibersort/cibersort.tsv \
--out_path ${Images_path}/cibersort

####################################################################################
## 干细胞评分
## 干细胞评分和表达的关系
${expressionTime_rscript} ${scripts_path}/stemScore_calculate.R \
--sample_list_file ${config_path}/tumor_normal.list \
--signature_file ${tools_path}/StandTools/Cibersort/SC-pcbc-stemsig.tsv \
--rsem_file_forcor ${rsme_path}/CombineTpm.FilterLowExpression-MergeMutiSample.tsv \
--rsem_file_forstem ${rsme_path}/CombineTPM.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${Images_path}/stemness

####################################################################################
## DDR评分
${expressionTime_rscript} ${scripts_path}/GSVA_DnaDamage.R \
--rsem_file ${rsme_path}/CombineTPM.tsv \
--ddr_file ~/WGS_Scripts/v2/PathWay/DDR.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${Images_path}/ddr_gsva

## DDR画图
${expressionTime_rscript} ${scripts_path}/GSVA_DnaDamage_match.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--input_file ${Images_path}/ddr_gsva/gsva_ddr.tsv \
--out_path ${Images_path}/ddr_gsva

<<EOF
## DDR评分和基因表达
${expressionTime_rscript} ${scripts_path}/ddrScore_calculate.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--ddr_file ${Images_path}/ddr_gsva/gsva_ddr.MutipleStage.tsv \
--rsem_file_forcor ${rsme_path}/CombineTpm.FilterLowExpression-MergeMutiSample.tsv \
--out_path ${Images_path}/ddr_gsva
EOF

####################################################################################
## GSVA通用代码
## 胃干细胞评分
pathway_name=Gastric_stem_cells
mkdir -p ${Images_path}/${pathway_name}
${expressionTime_rscript} ${scripts_path}/GSVA_score.General.R \
--rsem_file ${rsme_path}/CombineTPM.tsv \
--pathway_file ${public_ref_path}/Gastric_stem_cells.marker.list \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_file ${Images_path}/${pathway_name}/${pathway_name}.gsva.tsv

${expressionTime_rscript} ${scripts_path}/GSVA_score.General.plot.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--input_file ${Images_path}/${pathway_name}/${pathway_name}.gsva.tsv \
--out_file ${Images_path}/${pathway_name}/${pathway_name}.gsva.pdf

####################################################################################
## 20220929
## 选择重要基因
## 增加是否为DDR
## 给基因判断证据等级
## level：既为已报道(胃癌的smg或cgc或DDR)、又在hallmarks通路、又是hub基因、又与干细胞评分相关
## leve2：为已报道(胃癌的smg或cgc或DDR)、hallmarks通路、hub基因、干细胞评分相关的任三
## leve3：为已报道(胃癌的smg或cgc或DDR)、hallmarks通路、hub基因、干细胞评分相关的任二
## leve4：为已报道(胃癌的smg或cgc或DDR)、hallmarks通路、hub基因、干细胞评分相关的任一
rm -rf ${Images_path}/importGene_v2
${Rscript} ${scripts_path}/selectImportantGene.addStem.addDDR_v2.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--igc_cluster_file ${Images_path}/mfuzz_v2/mfuzz_plot_IGC.annotation.tsv \
--dgc_cluster_file ${Images_path}/mfuzz_v2/mfuzz_plot_DGC.annotation.tsv \
--rsem_file ${Images_path}/DiffGene/CombineCounts.FilterLowExpression-MergeMutiSample.varianceStabilizingTransformation.tsv \
--stem_file ${Images_path}/stemness/StemScore_Expression.tsv \
--stem_tpm_file ${Images_path}/stemness/StemScore.MutipleStage.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--ddr_gene_file ${public_ref_path}/DDR.list \
--out_path ${Images_path}/importGene_v2

####################################################################################
## 关注基因的表达改变情况
show_genelist=("MUC6" "CDX2" "CDX1" "PREX1")
for gene in  ${show_genelist[@]}
do
## deseq2标准化counts
echo ${gene}
${Rscript} ${scripts_path}/showGene.Normalize.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--rsem_file ${Images_path}/DiffGene/CombineCounts.FilterLowExpression-MergeMutiSample.varianceStabilizingTransformation.tsv \
--out_path ${Images_path}/showGene \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--gene ${gene}

## TPM
${Rscript} ${scripts_path}/showGene.TPM.R \
--sample_list_file ${config_path}/tumor_normal.list  \
--rsem_file ${rsme_path}/CombineTpm.FilterLowExpression-MergeMutiSample.tsv \
--out_path ${Images_path}/showGene \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--gene ${gene}
done